#!/usr/bin/python 

#Problem 1
import itertools

sizeOfPalindrome = 6
reversePalindromes = []

#essentially performs a "repeat"-dimensional cartesian product
halfPermutations = itertools.product('ATGC',repeat=sizeOfPalindrome/2) 

for permutation in halfPermutations:
	reversePalindromes.append(''.join(permutation)+''.join(reversed(['A' if b=='T' else 'C' if b=='G' else 'T' if b=='A' else 'G' for b in permutation])))

print reversePalindromes

#Problem 2 
import sys

with open(sys.argv[1], 'r') as sequenceFile:
	fullSequence = sequenceFile.read()

reversePalindroneFrequencies = {}

for reversePalindrone in reversePalindromes:
	reversePalindroneFrequencies[reversePalindrone] = fullSequence.count(reversePalindrone)

print reversePalindroneFrequencies


#Sorted order: ['CGTACG', 'CGATCG', 'TCGCGA', 'ACGCGT', 'GTCGAC', 'CGCGCG', 'ACCGGT', 'GCGCGC', 'GACGTC', 'CCGCGG', 'ATCGAT', 'TGCGCA', 'CGGCCG', 'TCCGGA', 'TTCGAA', 'AGCGCT', 'TACGTA', 'CTCGAG', 'GCCGGC', 'AACGTT', 'GGCGCC', 'CACGTG', 'GCTAGC', 'GGTACC', 'GTTAAC', 'GGATCC', 'ACTAGT', 'GTATAC', 'CCCGGG', 'GATATC', 'GTGCAC', 'CTATAG', 'GGGCCC', 'CAATTG', 'AGTACT', 'GCATGC', 'TAGCTA', 'CCTAGG', 'GAGCTC', 'CTTAAG', 'TGATCA', 'CCATGG', 'AGATCT', 'GAATTC', 'TGTACA', 'TCTAGA', 'AAGCTT', 'ATGCAT', 'CATATG', 'AGGCCT', 'TTGCAA', 'TCATGA', 'ACATGT', 'CAGCTG', 'TAATTA', 'CTGCAG', 'ATTAAT', 'TGGCCA', 'TTATAA', 'TATATA', 'ATATAT', 'AATATT', 'AAATTT', 'TTTAAA']

#Problem 3 
import matplotlib.pyplot
import re 

cutStrands = re.sub("(?<=CGTAC)(?=G)","jaretrulz",fullSequence).split("jaretrulz") #super hacky because re.split sucks
matplotlib.pyplot.hist([len(s) for s in cutStrands], bins=100, range=(0,3000000))
matplotlib.pyplot.show()

#Problem 4 

HapllLengths = [68, 114, 133, 162, 387, 557, 649, 813, 1737, 2012, 2325, 7342]
BstUILengths = [235, 316, 1403, 1455, 1562, 1589, 1633, 1666, 6440]
HapllAndBstUILengths = [9, 68, 99, 114, 133, 136, 162, 183, 387, 513, 557, 754, 813, 1032, 1455, 1562, 1633, 1666, 2012, 3011]

def powerSet(inputSet):
	powerSet = [[]]
	for element in inputSet:
		powerSet.extend([subset + [element] for subset in powerSet])
	return powerSet

def determineOrderOfSegments(digestionSetA, digestionSetB, combinedDigestionSets):
	powerSetHits = []
	for setToTry in powerSet(combinedDigestionSets):
		if sum(setToTry) in digestionSetA + digestionSetB:
			powerSetHits.extend([setToTry])
	
	solutionFound = False

	remainingLengthsToExplain = ["init"]

	while not remainingLengthsToExplain:
		remainingLengthsToExplain = digestionSetA + digestionSetB
		usedPowerSetElements = []
		for powerSetHit in powerSetHits:
			for powerSetElement in powerSetHit:
				if powerSetElement in usedPowerSetElements:
					break
			remainingLengthsToExplain.remove(sum(powerSetHit))
			usedPowerSetElements.extend(powerSetHit)
		print sorted(usedPowerSetElements)

determineOrderOfSegments(HapllLengths, BstUILengths, HapllAndBstUILengths)


